#devtools::install_github("https://github.com/jonlachmann/GMJMCMC/tree/stratos")
library(FBMS) 
## Loading required package: fastglm
## Loading required package: bigmemory
## Loading required package: GenSA
## Loading required package: parallel
## Loading required package: r2r
## Loading required package: BAS
## Loading required package: tolerance
## tolerance package, version 3.0.0, Released 2024-04-18
## This package is based upon work supported by the Chan Zuckerberg Initiative: Essential Open Source Software for Science (Grant No. 2020-255193).
library(tidyverse) 
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(DataExplorer)
#setwd("Task_Kepler")

Load the data and perform EDA

data = read.csv("https://raw.githubusercontent.com/OpenExoplanetCatalogue/oec_tables/master/comma_separated/open_exoplanet_catalogue.txt")


head(data)
DataExplorer::plot_str(data = data)
DataExplorer::plot_intro(data = data) 

DataExplorer::plot_missing(data = data)

DataExplorer::plot_density(data[,-2],nrow = 2,ncol = 3)

DataExplorer::plot_density(log(data[,c("eccentricity","mass","period","semimajoraxis","hoststar_radius","hoststar_mass")]),nrow = 2,ncol = 3)
## Warning in FUN(X[[i]], ...): NaNs produced

DataExplorer::plot_correlation(data[,-2],type = "continuous",cor_args = list(use = "pairwise.complete.obs", method = "pearson"))
## Warning: Removed 24 rows containing missing values or values outside the scale range
## (`geom_text()`).

Select relevant columns for analysis

We shall keep 300 observations as a hold out test set and the rest as the training set

data <- data %>% select(semimajoraxis,mass, radius, period, eccentricity, hoststar_mass, hoststar_radius, hoststar_metallicity,hoststar_temperature,binaryflag) %>% na.omit()
summary(data)
##  semimajoraxis           mass              radius            period         
##  Min.   : 0.00916   Min.   : 0.00001   Min.   :0.01644   Min.   :     0.45  
##  1st Qu.: 0.04328   1st Qu.: 0.02541   1st Qu.:0.22626   1st Qu.:     3.35  
##  Median : 0.06010   Median : 0.27300   Median :0.85600   Median :     5.32  
##  Mean   : 0.78890   Mean   : 1.27930   Mean   :0.76944   Mean   :  1047.36  
##  3rd Qu.: 0.13000   3rd Qu.: 1.05700   3rd Qu.:1.19000   3rd Qu.:    18.36  
##  Max.   :67.96000   Max.   :79.00000   Max.   :2.08500   Max.   :166510.00  
##   eccentricity      hoststar_mass   hoststar_radius  hoststar_metallicity
##  Min.   :-0.12929   Min.   :0.089   Min.   : 0.121   Min.   :-0.92000    
##  1st Qu.: 0.00000   1st Qu.:0.840   1st Qu.: 0.820   1st Qu.:-0.06000    
##  Median : 0.00380   Median :1.000   Median : 1.020   Median : 0.06000    
##  Mean   : 0.08658   Mean   :1.001   Mean   : 1.309   Mean   : 0.05242    
##  3rd Qu.: 0.10938   3rd Qu.:1.173   3rd Qu.: 1.400   3rd Qu.: 0.18100    
##  Max.   : 0.93369   Max.   :2.820   Max.   :86.400   Max.   : 7.79000    
##  hoststar_temperature   binaryflag     
##  Min.   :2516         Min.   :0.00000  
##  1st Qu.:5078         1st Qu.:0.00000  
##  Median :5645         Median :0.00000  
##  Mean   :5472         Mean   :0.05538  
##  3rd Qu.:6000         3rd Qu.:0.00000  
##  Max.   :9360         Max.   :2.00000
set.seed(1)
te.ind <- sample.int(n = 939,size = 300,replace = F)
data.train = data[-te.ind,]
data.test = data[te.ind,]

Iteration 1: Simple Bayesian Gaussian regression model with model averaging and Jeffreys prior

set.seed(1)
blr <- FBMS::fbms(semimajoraxis ~ ., data = data.train,beta_prior = list(type = "Jeffreys-BIC"), N = 5000)
## Data (y) coerced to matrix type.
## 
## MJMCMC begin.
## New best crit in cur pop: -891.269947774591 
## New best crit in cur pop: -888.074749102749 
## New best crit in cur pop: -884.846384872744 
## New best crit in cur pop: -882.381685814075 
##  |=                                       | |==                                      | |===                                     | |====                                    | |=====                                   | |======                                  | |=======                                 | |========                                | |=========                               | |==========                              | |===========                             | |============                            | |=============                           | |==============                          | |===============                         | |================                        | |=================                       | |==================                      | |===================                     | |====================                    | |=====================                   | |======================                  | |=======================                 | |========================                | |=========================               | |==========================              | |===========================             | |============================            | |=============================           | |==============================          | |===============================         | |================================        | |=================================       | |==================================      | |===================================     | |====================================    | |=====================================   | |======================================  | |======================================= | |========================================|
## MJMCMC done.
plot(blr)

## [1] "done"
summary(blr,labels = names(data.train)[-1],effects = c(0.025,0.975),tol = 0)
##                    Importance | Feature
## ##############################| mass 
##                              #| radius 
## ##############################| period 
##  #############################| eccentricity 
##                              #| hoststar_mass 
##                              #| hoststar_radius 
##               ################| hoststar_metallicity 
##                              #| hoststar_temperature 
##                             ##| binaryflag 
## 
## Best log marginal posterior:  -882.3817
## $PIP
##          feats.strings marg.probs
## 1                 mass 1.00000000
## 2               period 1.00000000
## 3         eccentricity 0.99905050
## 4 hoststar_metallicity 0.53885736
## 5           binaryflag 0.06712675
## 6               radius 0.06277839
## 7        hoststar_mass 0.03812292
## 8 hoststar_temperature 0.03490408
## 9      hoststar_radius 0.03354200
## 
## $EFF
##               Covariate quant_0.025 quant_0.975
## 1             intercept      0.0677      0.1787
## 2                  mass      0.0667      0.0693
## 3                radius     -0.1039       0.111
## 4                period       5e-04       5e-04
## 5          eccentricity      0.9719      1.2406
## 6         hoststar_mass           0           0
## 7       hoststar_radius      -0.111       0.111
## 8  hoststar_metallicity     -0.5247     -0.0548
## 9  hoststar_temperature      -0.111       0.111
## 10           binaryflag      -0.084           0

Check stability

set.seed(1)
all.probs <- sapply(1:20,FUN = function(x)FBMS::fbms(semimajoraxis ~ ., data = data.train,beta_prior = list(type = "Jeffreys-BIC"), N = 5000)$marg.probs)
## Data (y) coerced to matrix type.
## 
## MJMCMC begin.
## New best crit in cur pop: -891.269947774591 
## New best crit in cur pop: -888.074749102749 
## New best crit in cur pop: -884.846384872744 
## New best crit in cur pop: -882.381685814075 
##  |=                                       | |==                                      | |===                                     | |====                                    | |=====                                   | |======                                  | |=======                                 | |========                                | |=========                               | |==========                              | |===========                             | |============                            | |=============                           | |==============                          | |===============                         | |================                        | |=================                       | |==================                      | |===================                     | |====================                    | |=====================                   | |======================                  | |=======================                 | |========================                | |=========================               | |==========================              | |===========================             | |============================            | |=============================           | |==============================          | |===============================         | |================================        | |=================================       | |==================================      | |===================================     | |====================================    | |=====================================   | |======================================  | |======================================= | |========================================|
## MJMCMC done.
## Data (y) coerced to matrix type.
## 
## MJMCMC begin.
## New best crit in cur pop: -890.830103519341 
##  |=                                       |New best crit in cur pop: -890.145133794119 
## New best crit in cur pop: -887.999964293161 
## New best crit in cur pop: -885.668152515449 
## New best crit in cur pop: -882.498478834685 
##  |==                                      |New best crit in cur pop: -882.381685814075 
##  |===                                     | |====                                    | |=====                                   | |======                                  | |=======                                 | |========                                | |=========                               | |==========                              | |===========                             | |============                            | |=============                           | |==============                          | |===============                         | |================                        | |=================                       | |==================                      | |===================                     | |====================                    | |=====================                   | |======================                  | |=======================                 | |========================                | |=========================               | |==========================              | |===========================             | |============================            | |=============================           | |==============================          | |===============================         | |================================        | |=================================       | |==================================      | |===================================     | |====================================    | |=====================================   | |======================================  | |======================================= | |========================================|
## MJMCMC done.
## Data (y) coerced to matrix type.
## 
## MJMCMC begin.
## New best crit in cur pop: -1844.67644968324 
## New best crit in cur pop: -1838.96659728019 
## New best crit in cur pop: -1831.7536831939 
## New best crit in cur pop: -889.877530220413 
## New best crit in cur pop: -888.714791650645 
##  |=                                       |New best crit in cur pop: -882.498478834685 
## New best crit in cur pop: -882.381685814075 
##  |==                                      | |===                                     | |====                                    | |=====                                   | |======                                  | |=======                                 | |========                                | |=========                               | |==========                              | |===========                             | |============                            | |=============                           | |==============                          | |===============                         | |================                        | |=================                       | |==================                      | |===================                     | |====================                    | |=====================                   | |======================                  | |=======================                 | |========================                | |=========================               | |==========================              | |===========================             | |============================            | |=============================           | |==============================          | |===============================         | |================================        | |=================================       | |==================================      | |===================================     | |====================================    | |=====================================   | |======================================  | |======================================= | |========================================|
## MJMCMC done.
## Data (y) coerced to matrix type.
## 
## MJMCMC begin.
## New best crit in cur pop: -888.90176207462 
## New best crit in cur pop: -885.722825204211 
## New best crit in cur pop: -885.58028723222 
## New best crit in cur pop: -882.381685814075 
##  |=                                       | |==                                      | |===                                     | |====                                    | |=====                                   | |======                                  | |=======                                 | |========                                | |=========                               | |==========                              | |===========                             | |============                            | |=============                           | |==============                          | |===============                         | |================                        | |=================                       | |==================                      | |===================                     | |====================                    | |=====================                   | |======================                  | |=======================                 | |========================                | |=========================               | |==========================              | |===========================             | |============================            | |=============================           | |==============================          | |===============================         | |================================        | |=================================       | |==================================      | |===================================     | |====================================    | |=====================================   | |======================================  | |======================================= | |========================================|
## MJMCMC done.
## Data (y) coerced to matrix type.
## 
## MJMCMC begin.
## New best crit in cur pop: -893.724018620832 
## New best crit in cur pop: -891.296466484403 
## New best crit in cur pop: -888.714791650645 
## New best crit in cur pop: -882.498478834685 
##  |=                                       |New best crit in cur pop: -882.381685814075 
##  |==                                      | |===                                     | |====                                    | |=====                                   | |======                                  | |=======                                 | |========                                | |=========                               | |==========                              | |===========                             | |============                            | |=============                           | |==============                          | |===============                         | |================                        | |=================                       | |==================                      | |===================                     | |====================                    | |=====================                   | |======================                  | |=======================                 | |========================                | |=========================               | |==========================              | |===========================             | |============================            | |=============================           | |==============================          | |===============================         | |================================        | |=================================       | |==================================      | |===================================     | |====================================    | |=====================================   | |======================================  | |======================================= | |========================================|
## MJMCMC done.
## Data (y) coerced to matrix type.
## 
## MJMCMC begin.
## New best crit in cur pop: -1845.89538777955 
## New best crit in cur pop: -1837.64593836242 
## New best crit in cur pop: -897.529909217785 
## New best crit in cur pop: -896.561498236275 
## New best crit in cur pop: -890.594325589114 
## New best crit in cur pop: -888.297531447619 
## New best crit in cur pop: -885.679345409212 
## New best crit in cur pop: -882.498478834685 
##  |=                                       |New best crit in cur pop: -882.381685814075 
##  |==                                      | |===                                     | |====                                    | |=====                                   | |======                                  | |=======                                 | |========                                | |=========                               | |==========                              | |===========                             | |============                            | |=============                           | |==============                          | |===============                         | |================                        | |=================                       | |==================                      | |===================                     | |====================                    | |=====================                   | |======================                  | |=======================                 | |========================                | |=========================               | |==========================              | |===========================             | |============================            | |=============================           | |==============================          | |===============================         | |================================        | |=================================       | |==================================      | |===================================     | |====================================    | |=====================================   | |======================================  | |======================================= | |========================================|
## MJMCMC done.
## Data (y) coerced to matrix type.
## 
## MJMCMC begin.
## New best crit in cur pop: -1845.89538777955 
## New best crit in cur pop: -1838.96659728019 
## New best crit in cur pop: -894.737656440886 
## New best crit in cur pop: -893.306992813025 
## New best crit in cur pop: -882.498478834685 
## New best crit in cur pop: -882.381685814075 
##  |=                                       | |==                                      | |===                                     | |====                                    | |=====                                   | |======                                  | |=======                                 | |========                                | |=========                               | |==========                              | |===========                             | |============                            | |=============                           | |==============                          | |===============                         | |================                        | |=================                       | |==================                      | |===================                     | |====================                    | |=====================                   | |======================                  | |=======================                 | |========================                | |=========================               | |==========================              | |===========================             | |============================            | |=============================           | |==============================          | |===============================         | |================================        | |=================================       | |==================================      | |===================================     | |====================================    | |=====================================   | |======================================  | |======================================= | |========================================|
## MJMCMC done.
## Data (y) coerced to matrix type.
## 
## MJMCMC begin.
## New best crit in cur pop: -887.674331079679 
## New best crit in cur pop: -882.498478834685 
##  |=                                       | |==                                      | |===                                     |New best crit in cur pop: -882.381685814075 
##  |====                                    | |=====                                   | |======                                  | |=======                                 | |========                                | |=========                               | |==========                              | |===========                             | |============                            | |=============                           | |==============                          | |===============                         | |================                        | |=================                       | |==================                      | |===================                     | |====================                    | |=====================                   | |======================                  | |=======================                 | |========================                | |=========================               | |==========================              | |===========================             | |============================            | |=============================           | |==============================          | |===============================         | |================================        | |=================================       | |==================================      | |===================================     | |====================================    | |=====================================   | |======================================  | |======================================= | |========================================|
## MJMCMC done.
## Data (y) coerced to matrix type.
## 
## MJMCMC begin.
## New best crit in cur pop: -1847.04008886036 
## New best crit in cur pop: -1844.55854048174 
## New best crit in cur pop: -1841.85686280826 
## New best crit in cur pop: -886.954933511773 
## New best crit in cur pop: -884.846384872744 
## New best crit in cur pop: -882.381685814075 
##  |=                                       | |==                                      | |===                                     | |====                                    | |=====                                   | |======                                  | |=======                                 | |========                                | |=========                               | |==========                              | |===========                             | |============                            | |=============                           | |==============                          | |===============                         | |================                        | |=================                       | |==================                      | |===================                     | |====================                    | |=====================                   | |======================                  | |=======================                 | |========================                | |=========================               | |==========================              | |===========================             | |============================            | |=============================           | |==============================          | |===============================         | |================================        | |=================================       | |==================================      | |===================================     | |====================================    | |=====================================   | |======================================  | |======================================= | |========================================|
## MJMCMC done.
## Data (y) coerced to matrix type.
## 
## MJMCMC begin.
## New best crit in cur pop: -909.164517121934 
## New best crit in cur pop: -887.674331079679 
## New best crit in cur pop: -884.846384872744 
## New best crit in cur pop: -882.498478834685 
##  |=                                       |New best crit in cur pop: -882.381685814075 
##  |==                                      | |===                                     | |====                                    | |=====                                   | |======                                  | |=======                                 | |========                                | |=========                               | |==========                              | |===========                             | |============                            | |=============                           | |==============                          | |===============                         | |================                        | |=================                       | |==================                      | |===================                     | |====================                    | |=====================                   | |======================                  | |=======================                 | |========================                | |=========================               | |==========================              | |===========================             | |============================            | |=============================           | |==============================          | |===============================         | |================================        | |=================================       | |==================================      | |===================================     | |====================================    | |=====================================   | |======================================  | |======================================= | |========================================|
## MJMCMC done.
## Data (y) coerced to matrix type.
## 
## MJMCMC begin.
## New best crit in cur pop: -1841.67415605265 
## New best crit in cur pop: -1838.96659728019 
## New best crit in cur pop: -894.092667392015 
## New best crit in cur pop: -893.034095251073 
##  |=                                       |New best crit in cur pop: -890.830103519341 
## New best crit in cur pop: -887.674503762361 
## New best crit in cur pop: -885.595154130658 
## New best crit in cur pop: -882.381685814075 
##  |==                                      | |===                                     | |====                                    | |=====                                   | |======                                  | |=======                                 | |========                                | |=========                               | |==========                              | |===========                             | |============                            | |=============                           | |==============                          | |===============                         | |================                        | |=================                       | |==================                      | |===================                     | |====================                    | |=====================                   | |======================                  | |=======================                 | |========================                | |=========================               | |==========================              | |===========================             | |============================            | |=============================           | |==============================          | |===============================         | |================================        | |=================================       | |==================================      | |===================================     | |====================================    | |=====================================   | |======================================  | |======================================= | |========================================|
## MJMCMC done.
## Data (y) coerced to matrix type.
## 
## MJMCMC begin.
## New best crit in cur pop: -882.498478834685 
## New best crit in cur pop: -882.381685814075 
##  |=                                       | |==                                      | |===                                     | |====                                    | |=====                                   | |======                                  | |=======                                 | |========                                | |=========                               | |==========                              | |===========                             | |============                            | |=============                           | |==============                          | |===============                         | |================                        | |=================                       | |==================                      | |===================                     | |====================                    | |=====================                   | |======================                  | |=======================                 | |========================                | |=========================               | |==========================              | |===========================             | |============================            | |=============================           | |==============================          | |===============================         | |================================        | |=================================       | |==================================      | |===================================     | |====================================    | |=====================================   | |======================================  | |======================================= | |========================================|
## MJMCMC done.
## Data (y) coerced to matrix type.
## 
## MJMCMC begin.
## New best crit in cur pop: -888.714791650645 
## New best crit in cur pop: -882.498478834685 
## New best crit in cur pop: -882.381685814075 
##  |=                                       | |==                                      | |===                                     | |====                                    | |=====                                   | |======                                  | |=======                                 | |========                                | |=========                               | |==========                              | |===========                             | |============                            | |=============                           | |==============                          | |===============                         | |================                        | |=================                       | |==================                      | |===================                     | |====================                    | |=====================                   | |======================                  | |=======================                 | |========================                | |=========================               | |==========================              | |===========================             | |============================            | |=============================           | |==============================          | |===============================         | |================================        | |=================================       | |==================================      | |===================================     | |====================================    | |=====================================   | |======================================  | |======================================= | |========================================|
## MJMCMC done.
## Data (y) coerced to matrix type.
## 
## MJMCMC begin.
## New best crit in cur pop: -888.714791650645 
## New best crit in cur pop: -882.498478834685 
##  |=                                       |New best crit in cur pop: -882.381685814075 
##  |==                                      | |===                                     | |====                                    | |=====                                   | |======                                  | |=======                                 | |========                                | |=========                               | |==========                              | |===========                             | |============                            | |=============                           | |==============                          | |===============                         | |================                        | |=================                       | |==================                      | |===================                     | |====================                    | |=====================                   | |======================                  | |=======================                 | |========================                | |=========================               | |==========================              | |===========================             | |============================            | |=============================           | |==============================          | |===============================         | |================================        | |=================================       | |==================================      | |===================================     | |====================================    | |=====================================   | |======================================  | |======================================= | |========================================|
## MJMCMC done.
## Data (y) coerced to matrix type.
## 
## MJMCMC begin.
## New best crit in cur pop: -1847.26095862069 
## New best crit in cur pop: -1844.55922698962 
## New best crit in cur pop: -1841.51759483189 
## New best crit in cur pop: -1838.96659728019 
## New best crit in cur pop: -885.668152515449 
## New best crit in cur pop: -882.498478834685 
## New best crit in cur pop: -882.381685814075 
##  |=                                       | |==                                      | |===                                     | |====                                    | |=====                                   | |======                                  | |=======                                 | |========                                | |=========                               | |==========                              | |===========                             | |============                            | |=============                           | |==============                          | |===============                         | |================                        | |=================                       | |==================                      | |===================                     | |====================                    | |=====================                   | |======================                  | |=======================                 | |========================                | |=========================               | |==========================              | |===========================             | |============================            | |=============================           | |==============================          | |===============================         | |================================        | |=================================       | |==================================      | |===================================     | |====================================    | |=====================================   | |======================================  | |======================================= | |========================================|
## MJMCMC done.
## Data (y) coerced to matrix type.
## 
## MJMCMC begin.
## New best crit in cur pop: -891.98757372902 
## New best crit in cur pop: -889.877530220413 
## New best crit in cur pop: -882.381685814075 
##  |=                                       | |==                                      | |===                                     | |====                                    | |=====                                   | |======                                  | |=======                                 | |========                                | |=========                               | |==========                              | |===========                             | |============                            | |=============                           | |==============                          | |===============                         | |================                        | |=================                       | |==================                      | |===================                     | |====================                    | |=====================                   | |======================                  | |=======================                 | |========================                | |=========================               | |==========================              | |===========================             | |============================            | |=============================           | |==============================          | |===============================         | |================================        | |=================================       | |==================================      | |===================================     | |====================================    | |=====================================   | |======================================  | |======================================= | |========================================|
## MJMCMC done.
## Data (y) coerced to matrix type.
## 
## MJMCMC begin.
## New best crit in cur pop: -1836.88756940836 
## New best crit in cur pop: -897.144778754016 
## New best crit in cur pop: -889.886123470089 
## New best crit in cur pop: -887.674503762361 
## New best crit in cur pop: -885.595154130658 
## New best crit in cur pop: -882.498478834685 
##  |=                                       |New best crit in cur pop: -882.381685814075 
##  |==                                      | |===                                     | |====                                    | |=====                                   | |======                                  | |=======                                 | |========                                | |=========                               | |==========                              | |===========                             | |============                            | |=============                           | |==============                          | |===============                         | |================                        | |=================                       | |==================                      | |===================                     | |====================                    | |=====================                   | |======================                  | |=======================                 | |========================                | |=========================               | |==========================              | |===========================             | |============================            | |=============================           | |==============================          | |===============================         | |================================        | |=================================       | |==================================      | |===================================     | |====================================    | |=====================================   | |======================================  | |======================================= | |========================================|
## MJMCMC done.
## Data (y) coerced to matrix type.
## 
## MJMCMC begin.
## New best crit in cur pop: -1831.39016843754 
## New best crit in cur pop: -889.877530220413 
## New best crit in cur pop: -888.714791650645 
##  |=                                       |New best crit in cur pop: -882.498478834685 
## New best crit in cur pop: -882.381685814075 
##  |==                                      | |===                                     | |====                                    | |=====                                   | |======                                  | |=======                                 | |========                                | |=========                               | |==========                              | |===========                             | |============                            | |=============                           | |==============                          | |===============                         | |================                        | |=================                       | |==================                      | |===================                     | |====================                    | |=====================                   | |======================                  | |=======================                 | |========================                | |=========================               | |==========================              | |===========================             | |============================            | |=============================           | |==============================          | |===============================         | |================================        | |=================================       | |==================================      | |===================================     | |====================================    | |=====================================   | |======================================  | |======================================= | |========================================|
## MJMCMC done.
## Data (y) coerced to matrix type.
## 
## MJMCMC begin.
## New best crit in cur pop: -892.407245672271 
## New best crit in cur pop: -890.166523026977 
## New best crit in cur pop: -887.674331079679 
## New best crit in cur pop: -884.846384872744 
## New best crit in cur pop: -882.498478834685 
## New best crit in cur pop: -882.381685814075 
##  |=                                       | |==                                      | |===                                     | |====                                    | |=====                                   | |======                                  | |=======                                 | |========                                | |=========                               | |==========                              | |===========                             | |============                            | |=============                           | |==============                          | |===============                         | |================                        | |=================                       | |==================                      | |===================                     | |====================                    | |=====================                   | |======================                  | |=======================                 | |========================                | |=========================               | |==========================              | |===========================             | |============================            | |=============================           | |==============================          | |===============================         | |================================        | |=================================       | |==================================      | |===================================     | |====================================    | |=====================================   | |======================================  | |======================================= | |========================================|
## MJMCMC done.
## Data (y) coerced to matrix type.
## 
## MJMCMC begin.
## New best crit in cur pop: -890.84613555129 
## New best crit in cur pop: -890.166523026977 
## New best crit in cur pop: -887.674331079679 
## New best crit in cur pop: -884.846384872744 
## New best crit in cur pop: -882.498478834685 
## New best crit in cur pop: -882.381685814075 
##  |=                                       | |==                                      | |===                                     | |====                                    | |=====                                   | |======                                  | |=======                                 | |========                                | |=========                               | |==========                              | |===========                             | |============                            | |=============                           | |==============                          | |===============                         | |================                        | |=================                       | |==================                      | |===================                     | |====================                    | |=====================                   | |======================                  | |=======================                 | |========================                | |=========================               | |==========================              | |===========================             | |============================            | |=============================           | |==============================          | |===============================         | |================================        | |=================================       | |==================================      | |===================================     | |====================================    | |=====================================   | |======================================  | |======================================= | |========================================|
## MJMCMC done.
chain_means <- rowMeans(all.probs)
chain_sd <- sapply(1:9, function(i) sd(all.probs[i,]))

alpha_upper <- chain_means + 1.96*chain_sd
alpha_lower <- chain_means - 1.96*chain_sd

stability <- data.frame(covariate = names(data)[-1],one.chain = round(blr$marg.probs[1,],4),mean = round(chain_means,4),lower = round(alpha_lower,4),upper = round(alpha_upper,4))

print(stability)
##              covariate one.chain   mean  lower  upper
## 1                 mass    1.0000 1.0000 1.0000 1.0000
## 2               radius    0.0628 0.0621 0.0487 0.0756
## 3               period    1.0000 1.0000 1.0000 1.0000
## 4         eccentricity    0.9991 0.9991 0.9989 0.9994
## 5        hoststar_mass    0.0381 0.0414 0.0360 0.0468
## 6      hoststar_radius    0.0335 0.0301 0.0133 0.0468
## 7 hoststar_metallicity    0.5389 0.5415 0.5273 0.5556
## 8 hoststar_temperature    0.0349 0.0326 0.0209 0.0443
## 9           binaryflag    0.0671 0.0700 0.0521 0.0879

Bayesian Linear regression BMA results appear fairly stable, allowing us to continue with their discussion.

Marginal Inclusion Probabilities

The marginal inclusion probabilities indicate the likelihood that each predictor is included in the model. High probabilities suggest that the predictor is likely important for explaining the response variable.

  1. period is included in the model with absolute certainty. The orbital period is a fundamental characteristic of an orbit and strongly influences the semimajor axis due to Kepler’s third law, which relates the square of the period to the cube of the semimajor axis.

  2. mass is also almost certainly included in the model. The mass of the orbiting body can affect the dynamics of the system, influencing the semimajor axis through gravitational interactions.

  3. eccentricity has a high inclusion probability. The eccentricity of an orbit describes its deviation from a perfect circle, which can affect the semimajor axis as it alters the orbital shape.

  4. hoststar_metallicity has a posterior inclusion above 0.5 and would be stably a part of the MPM. Hard to interpret its input on the semi major axes of the planet.

    …

  5. hoststar_mass has a posterior inclusion below 0.0001 that would indicate it is not important, which is a bit counter-intuitive as it should hold the planetary system together.

Quantiles of Effect Sizes

The quantiles of model averaged effect sizes of posterior modes across all models provide the 2.5% and 97.5% quantiles for the posterior distribution of each coefficient. We shall look at the predictors with marginal inclusion probabilities above 0.5

  1. mass: The positive interval indicates that as the mass increases, the semimajor axis is expected to increase. This makes sense physically, as a larger mass could imply a more substantial gravitational influence, potentially affecting the orbit’s size.

  2. period: The very narrow interval indicates a precise positive effect of the orbital period on the semimajor axis, consistent with Kepler’s third law.

  3. eccentricity: The wide interval, spanning from zero to a significant positive value, indicates uncertainty about the effect of eccentricity, but it can have a large positive effect.

    But let us additionally look at the 95% CrI for the median probability model, which under Jeffreys priors approximately correspond to the 95% CI, allowing us to easily obtain the estimates using the lm function in R

    confint(lm(semimajoraxis ~ 1 + mass + period + eccentricity+hoststar_metallicity, data = data.train))
    ##                              2.5 %        97.5 %
    ## (Intercept)           0.0486771880  0.2210518564
    ## mass                  0.0509147037  0.0872969812
    ## period                0.0004529113  0.0004694903
    ## eccentricity          0.6680098468  1.6437789689
    ## hoststar_metallicity -0.8560986018 -0.1167353490

essentially supporting the conclusions of model averaged posterior modes.

Make predictions

preds.train <- predict(blr, as.matrix(data.train[,-1]), link = function(x) x)
preds.test <- predict(blr, as.matrix(data.test[,-1]))
r.blr <- round(c(cor(data.train[,1],preds.train$mean)^2,cor(data.test[,1],preds.test$mean)^2),3)
plot(x = data.train[, 1], preds.train$mean, xlab = "data", ylab = "prediction", main = "Title of the Plot", col = "blue", pch = 16)
points(x = data.test[, 1], preds.test$mean, col = "red", pch = 17)
legend("topright", legend = c(paste0("Training Data, R.sqr = ",r.blr[1]), paste0("Test Data, R.sqr = ",r.blr[2])), col = c("blue", "red"), pch = c(16, 17))

The predictions show extremely good predictive ability of the model for both training and testing data. Yet, let us reflect on potential pitfalls of the model do decide if we are interested in it.

Possible criticism

The finding that the host star’s mass is not important in predicting the semimajor axis does seem counterintuitive, as the mass of the host star should, in theory, have a significant influence on the orbit of the planets around it. Here are some potential reasons and considerations to critically evaluate this finding and whether it motivates the use of a more complex model:

  1. Data Quality and Completeness:

    • Missing or Inaccurate Data? Yet we assumed missing at random for all missing data. But if there are inaccuracies or biases in missing data for host star mass, might it explain why this predictor is not showing importance? We believe it is not important even if we were missing the systems with high or low solar mass systematically as the solar mass would still be much higher than the mass of a planet. The only way it could be important is if the solar mass was constant or having close to zero variance. Yet as we saw in EDA, this is not the case.
  2. Confounding Variables:

    • Collinearity: If host star mass is highly correlated with other predictors (e.g., period or eccentricity), its unique contribution might be overshadowed, leading to an underestimation of its importance. Again, this is not the case according to EDA.
  3. Model Simplicity:

    • Linear Model Limitations: A simple linear model might not capture the complex relationships between host star mass and the semimajor axis, particularly if the relationship is non-linear or interacts with other variables. We could try more complex models. Multiplicative effects for the original model could be testes by a model with log transformed responses. Additive non-linearities can be tested through through fractional polynomials. Finally, more complicated non-linear relationships with both non-linearities and interactions can be tested by a symbolic regression on BGNLM. Let us dig into this.

Iteration 2: Bayesian Gaussian regression model with model averaging and Jeffreys and prior log-transformed response

Inference

set.seed(1)
log.data.train <- log(abs(data.train)+.Machine$double.xmin)
log.data.test <- log(abs(data.test)+.Machine$double.xmin)
blr.log <- FBMS::fbms( semimajoraxis ~ .,beta_prior = list(type = "Jeffreys-BIC"), data = log.data.train, N = 5000)
## Data (y) coerced to matrix type.
## 
## MJMCMC begin.
## New best crit in cur pop: 957.794999509247 
## New best crit in cur pop: 1079.69314258627 
## New best crit in cur pop: 1082.61084412177 
## New best crit in cur pop: 1082.97920083271 
## New best crit in cur pop: 1085.87055371922 
## New best crit in cur pop: 1088.75331680507 
## New best crit in cur pop: 1088.88793834687 
## New best crit in cur pop: 1089.10049406151 
## New best crit in cur pop: 1091.0850487173 
##  |=                                       | |==                                      | |===                                     | |====                                    | |=====                                   | |======                                  | |=======                                 | |========                                | |=========                               | |==========                              | |===========                             | |============                            | |=============                           | |==============                          | |===============                         | |================                        | |=================                       | |==================                      | |===================                     | |====================                    | |=====================                   | |======================                  | |=======================                 | |========================                | |=========================               | |==========================              | |===========================             | |============================            | |=============================           | |==============================          | |===============================         | |================================        | |=================================       | |==================================      | |===================================     | |====================================    | |=====================================   | |======================================  | |======================================= | |========================================|
## MJMCMC done.
plot(blr.log)

## [1] "done"
summary(blr.log,labels = names(data.train)[-1],effects = c(0.025,0.975))
##                    Importance | Feature
##                              #| mass 
##                               | radius 
## ##############################| period 
##                              #| eccentricity 
## ##############################| hoststar_mass 
##                           ####| hoststar_radius 
##  #############################| hoststar_metallicity 
##                          #####| hoststar_temperature 
##                               | binaryflag 
## 
## Best log marginal posterior:  1091.085
## $PIP
##          feats.strings  marg.probs
## 1               period 1.000000000
## 2        hoststar_mass 1.000000000
## 3 hoststar_metallicity 0.993108251
## 4 hoststar_temperature 0.168649777
## 5      hoststar_radius 0.146866107
## 6         eccentricity 0.045620776
## 7                 mass 0.042970842
## 8               radius 0.028760601
## 9           binaryflag 0.006359678
## 
## $EFF
##               Covariate quant_0.025 quant_0.975
## 1             intercept      -4.446     -3.9319
## 2                  mass           0       5e-04
## 3                radius     -0.5141      0.5141
## 4                period      0.6667      0.6667
## 5          eccentricity     -0.5141      0.5141
## 6         hoststar_mass      0.2869      0.3314
## 7       hoststar_radius     -0.4978      0.5212
## 8  hoststar_metallicity           0           0
## 9  hoststar_temperature     -0.4547      0.5141
## 10           binaryflag           0           0

Check stability

set.seed(1)
all.probs <- sapply(1:20,FUN = function(x)FBMS::fbms(semimajoraxis ~ .,beta_prior = list(type = "Jeffreys-BIC"), data = log.data.train, N = 5000)$marg.probs)
## Data (y) coerced to matrix type.
## 
## MJMCMC begin.
## New best crit in cur pop: 957.794999509247 
## New best crit in cur pop: 1079.69314258627 
## New best crit in cur pop: 1082.61084412177 
## New best crit in cur pop: 1082.97920083271 
## New best crit in cur pop: 1085.87055371922 
## New best crit in cur pop: 1088.75331680507 
## New best crit in cur pop: 1088.88793834687 
## New best crit in cur pop: 1089.10049406151 
## New best crit in cur pop: 1091.0850487173 
##  |=                                       | |==                                      | |===                                     | |====                                    | |=====                                   | |======                                  | |=======                                 | |========                                | |=========                               | |==========                              | |===========                             | |============                            | |=============                           | |==============                          | |===============                         | |================                        | |=================                       | |==================                      | |===================                     | |====================                    | |=====================                   | |======================                  | |=======================                 | |========================                | |=========================               | |==========================              | |===========================             | |============================            | |=============================           | |==============================          | |===============================         | |================================        | |=================================       | |==================================      | |===================================     | |====================================    | |=====================================   | |======================================  | |======================================= | |========================================|
## MJMCMC done.
## Data (y) coerced to matrix type.
## 
## MJMCMC begin.
## New best crit in cur pop: -1096.16078245604 
## New best crit in cur pop: -1095.33156879494 
## New best crit in cur pop: -1089.58881316014 
## New best crit in cur pop: 1078.37443524742 
## New best crit in cur pop: 1080.69960264804 
## New best crit in cur pop: 1083.90581550079 
## New best crit in cur pop: 1088.3580136576 
## New best crit in cur pop: 1091.0850487173 
##  |=                                       | |==                                      | |===                                     | |====                                    | |=====                                   | |======                                  | |=======                                 | |========                                | |=========                               | |==========                              | |===========                             | |============                            | |=============                           | |==============                          | |===============                         | |================                        | |=================                       | |==================                      | |===================                     | |====================                    | |=====================                   | |======================                  | |=======================                 | |========================                | |=========================               | |==========================              | |===========================             | |============================            | |=============================           | |==============================          | |===============================         | |================================        | |=================================       | |==================================      | |===================================     | |====================================    | |=====================================   | |======================================  | |======================================= | |========================================|
## MJMCMC done.
## Data (y) coerced to matrix type.
## 
## MJMCMC begin.
## New best crit in cur pop: 1081.88863019303 
## New best crit in cur pop: 1084.65391895679 
## New best crit in cur pop: 1086.46574674056 
##  |=                                       |New best crit in cur pop: 1091.0850487173 
##  |==                                      | |===                                     | |====                                    | |=====                                   | |======                                  | |=======                                 | |========                                | |=========                               | |==========                              | |===========                             | |============                            | |=============                           | |==============                          | |===============                         | |================                        | |=================                       | |==================                      | |===================                     | |====================                    | |=====================                   | |======================                  | |=======================                 | |========================                | |=========================               | |==========================              | |===========================             | |============================            | |=============================           | |==============================          | |===============================         | |================================        | |=================================       | |==================================      | |===================================     | |====================================    | |=====================================   | |======================================  | |======================================= | |========================================|
## MJMCMC done.
## Data (y) coerced to matrix type.
## 
## MJMCMC begin.
## New best crit in cur pop: 1088.3580136576 
## New best crit in cur pop: 1091.0850487173 
##  |=                                       | |==                                      | |===                                     | |====                                    | |=====                                   | |======                                  | |=======                                 | |========                                | |=========                               | |==========                              | |===========                             | |============                            | |=============                           | |==============                          | |===============                         | |================                        | |=================                       | |==================                      | |===================                     | |====================                    | |=====================                   | |======================                  | |=======================                 | |========================                | |=========================               | |==========================              | |===========================             | |============================            | |=============================           | |==============================          | |===============================         | |================================        | |=================================       | |==================================      | |===================================     | |====================================    | |=====================================   | |======================================  | |======================================= | |========================================|
## MJMCMC done.
## Data (y) coerced to matrix type.
## 
## MJMCMC begin.
## New best crit in cur pop: -1104.65422466565 
## New best crit in cur pop: -1093.38417178757 
## New best crit in cur pop: 1079.67188566859 
## New best crit in cur pop: 1081.91514623497 
## New best crit in cur pop: 1085.13295937712 
## New best crit in cur pop: 1087.99874200791 
## New best crit in cur pop: 1091.0850487173 
##  |=                                       | |==                                      | |===                                     | |====                                    | |=====                                   | |======                                  | |=======                                 | |========                                | |=========                               | |==========                              | |===========                             | |============                            | |=============                           | |==============                          | |===============                         | |================                        | |=================                       | |==================                      | |===================                     | |====================                    | |=====================                   | |======================                  | |=======================                 | |========================                | |=========================               | |==========================              | |===========================             | |============================            | |=============================           | |==============================          | |===============================         | |================================        | |=================================       | |==================================      | |===================================     | |====================================    | |=====================================   | |======================================  | |======================================= | |========================================|
## MJMCMC done.
## Data (y) coerced to matrix type.
## 
## MJMCMC begin.
## New best crit in cur pop: -1101.58872250816 
## New best crit in cur pop: -1098.37503640339 
## New best crit in cur pop: 730.201147513271 
## New best crit in cur pop: 732.951567336542 
## New best crit in cur pop: 736.596896242667 
## New best crit in cur pop: 739.488528394121 
##  |=                                       |New best crit in cur pop: 951.120540852387 
## New best crit in cur pop: 952.258837448927 
## New best crit in cur pop: 1072.44809611872 
## New best crit in cur pop: 1077.33039283092 
## New best crit in cur pop: 1079.38261342416 
## New best crit in cur pop: 1079.7763671763 
## New best crit in cur pop: 1082.61187785025 
## New best crit in cur pop: 1087.99874200791 
## New best crit in cur pop: 1091.0850487173 
##  |==                                      | |===                                     | |====                                    | |=====                                   | |======                                  | |=======                                 | |========                                | |=========                               | |==========                              | |===========                             | |============                            | |=============                           | |==============                          | |===============                         | |================                        | |=================                       | |==================                      | |===================                     | |====================                    | |=====================                   | |======================                  | |=======================                 | |========================                | |=========================               | |==========================              | |===========================             | |============================            | |=============================           | |==============================          | |===============================         | |================================        | |=================================       | |==================================      | |===================================     | |====================================    | |=====================================   | |======================================  | |======================================= | |========================================|
## MJMCMC done.
## Data (y) coerced to matrix type.
## 
## MJMCMC begin.
## New best crit in cur pop: -1090.3747486322 
## New best crit in cur pop: -1089.58881316014 
## New best crit in cur pop: 1078.37443524742 
## New best crit in cur pop: 1080.69960264804 
## New best crit in cur pop: 1082.90150241412 
## New best crit in cur pop: 1085.14166282579 
## New best crit in cur pop: 1085.46269235986 
## New best crit in cur pop: 1088.3580136576 
##  |=                                       |New best crit in cur pop: 1091.0850487173 
##  |==                                      | |===                                     | |====                                    | |=====                                   | |======                                  | |=======                                 | |========                                | |=========                               | |==========                              | |===========                             | |============                            | |=============                           | |==============                          | |===============                         | |================                        | |=================                       | |==================                      | |===================                     | |====================                    | |=====================                   | |======================                  | |=======================                 | |========================                | |=========================               | |==========================              | |===========================             | |============================            | |=============================           | |==============================          | |===============================         | |================================        | |=================================       | |==================================      | |===================================     | |====================================    | |=====================================   | |======================================  | |======================================= | |========================================|
## MJMCMC done.
## Data (y) coerced to matrix type.
## 
## MJMCMC begin.
## New best crit in cur pop: 1077.62245204581 
## New best crit in cur pop: 1079.77145578101 
## New best crit in cur pop: 1080.55995447403 
## New best crit in cur pop: 1082.61187785025 
## New best crit in cur pop: 1083.00135217869 
## New best crit in cur pop: 1085.14166282579 
## New best crit in cur pop: 1088.3580136576 
## New best crit in cur pop: 1091.0850487173 
##  |=                                       | |==                                      | |===                                     | |====                                    | |=====                                   | |======                                  | |=======                                 | |========                                | |=========                               | |==========                              | |===========                             | |============                            | |=============                           | |==============                          | |===============                         | |================                        | |=================                       | |==================                      | |===================                     | |====================                    | |=====================                   | |======================                  | |=======================                 | |========================                | |=========================               | |==========================              | |===========================             | |============================            | |=============================           | |==============================          | |===============================         | |================================        | |=================================       | |==================================      | |===================================     | |====================================    | |=====================================   | |======================================  | |======================================= | |========================================|
## MJMCMC done.
## Data (y) coerced to matrix type.
## 
## MJMCMC begin.
## New best crit in cur pop: -1130.03613131586 
## New best crit in cur pop: -1126.91799911376 
## New best crit in cur pop: 1084.76885767843 
## New best crit in cur pop: 1087.85760975969 
## New best crit in cur pop: 1091.0850487173 
##  |=                                       | |==                                      | |===                                     | |====                                    | |=====                                   | |======                                  | |=======                                 | |========                                | |=========                               | |==========                              | |===========                             | |============                            | |=============                           | |==============                          | |===============                         | |================                        | |=================                       | |==================                      | |===================                     | |====================                    | |=====================                   | |======================                  | |=======================                 | |========================                | |=========================               | |==========================              | |===========================             | |============================            | |=============================           | |==============================          | |===============================         | |================================        | |=================================       | |==================================      | |===================================     | |====================================    | |=====================================   | |======================================  | |======================================= | |========================================|
## MJMCMC done.
## Data (y) coerced to matrix type.
## 
## MJMCMC begin.
## New best crit in cur pop: -1096.16078245604 
## New best crit in cur pop: 1077.33039283092 
## New best crit in cur pop: 1079.38261342416 
## New best crit in cur pop: 1079.69314258627 
## New best crit in cur pop: 1082.61084412177 
## New best crit in cur pop: 1082.97920083271 
##  |=                                       |New best crit in cur pop: 1087.99874200791 
## New best crit in cur pop: 1091.0850487173 
##  |==                                      | |===                                     | |====                                    | |=====                                   | |======                                  | |=======                                 | |========                                | |=========                               | |==========                              | |===========                             | |============                            | |=============                           | |==============                          | |===============                         | |================                        | |=================                       | |==================                      | |===================                     | |====================                    | |=====================                   | |======================                  | |=======================                 | |========================                | |=========================               | |==========================              | |===========================             | |============================            | |=============================           | |==============================          | |===============================         | |================================        | |=================================       | |==================================      | |===================================     | |====================================    | |=====================================   | |======================================  | |======================================= | |========================================|
## MJMCMC done.
## Data (y) coerced to matrix type.
## 
## MJMCMC begin.
## New best crit in cur pop: 1083.78519540375 
## New best crit in cur pop: 1083.93702719793 
## New best crit in cur pop: 1084.65391895679 
## New best crit in cur pop: 1086.46574674056 
##  |=                                       |New best crit in cur pop: 1091.0850487173 
##  |==                                      | |===                                     | |====                                    | |=====                                   | |======                                  | |=======                                 | |========                                | |=========                               | |==========                              | |===========                             | |============                            | |=============                           | |==============                          | |===============                         | |================                        | |=================                       | |==================                      | |===================                     | |====================                    | |=====================                   | |======================                  | |=======================                 | |========================                | |=========================               | |==========================              | |===========================             | |============================            | |=============================           | |==============================          | |===============================         | |================================        | |=================================       | |==================================      | |===================================     | |====================================    | |=====================================   | |======================================  | |======================================= | |========================================|
## MJMCMC done.
## Data (y) coerced to matrix type.
## 
## MJMCMC begin.
## New best crit in cur pop: 1080.67658654633 
## New best crit in cur pop: 1085.13295937712 
## New best crit in cur pop: 1087.85760975969 
## New best crit in cur pop: 1087.99874200791 
## New best crit in cur pop: 1091.0850487173 
##  |=                                       | |==                                      | |===                                     | |====                                    | |=====                                   | |======                                  | |=======                                 | |========                                | |=========                               | |==========                              | |===========                             | |============                            | |=============                           | |==============                          | |===============                         | |================                        | |=================                       | |==================                      | |===================                     | |====================                    | |=====================                   | |======================                  | |=======================                 | |========================                | |=========================               | |==========================              | |===========================             | |============================            | |=============================           | |==============================          | |===============================         | |================================        | |=================================       | |==================================      | |===================================     | |====================================    | |=====================================   | |======================================  | |======================================= | |========================================|
## MJMCMC done.
## Data (y) coerced to matrix type.
## 
## MJMCMC begin.
## New best crit in cur pop: -1090.32917976045 
## New best crit in cur pop: -1089.58881316014 
## New best crit in cur pop: 1077.33039283092 
## New best crit in cur pop: 1079.38261342416 
## New best crit in cur pop: 1080.09216059786 
## New best crit in cur pop: 1082.30025668189 
## New best crit in cur pop: 1083.32158087464 
## New best crit in cur pop: 1085.5297818783 
## New best crit in cur pop: 1085.87094049572 
## New best crit in cur pop: 1087.85709207507 
##  |=                                       |New best crit in cur pop: 1087.99874200791 
## New best crit in cur pop: 1091.0850487173 
##  |==                                      | |===                                     | |====                                    | |=====                                   | |======                                  | |=======                                 | |========                                | |=========                               | |==========                              | |===========                             | |============                            | |=============                           | |==============                          | |===============                         | |================                        | |=================                       | |==================                      | |===================                     | |====================                    | |=====================                   | |======================                  | |=======================                 | |========================                | |=========================               | |==========================              | |===========================             | |============================            | |=============================           | |==============================          | |===============================         | |================================        | |=================================       | |==================================      | |===================================     | |====================================    | |=====================================   | |======================================  | |======================================= | |========================================|
## MJMCMC done.
## Data (y) coerced to matrix type.
## 
## MJMCMC begin.
## New best crit in cur pop: 455.113148590775 
## New best crit in cur pop: 461.395484200828 
## New best crit in cur pop: 1077.60989022328 
## New best crit in cur pop: 1080.11834705106 
## New best crit in cur pop: 1083.2383843682 
## New best crit in cur pop: 1087.85760975969 
##  |=                                       |New best crit in cur pop: 1091.0850487173 
##  |==                                      | |===                                     | |====                                    | |=====                                   | |======                                  | |=======                                 | |========                                | |=========                               | |==========                              | |===========                             | |============                            | |=============                           | |==============                          | |===============                         | |================                        | |=================                       | |==================                      | |===================                     | |====================                    | |=====================                   | |======================                  | |=======================                 | |========================                | |=========================               | |==========================              | |===========================             | |============================            | |=============================           | |==============================          | |===============================         | |================================        | |=================================       | |==================================      | |===================================     | |====================================    | |=====================================   | |======================================  | |======================================= | |========================================|
## MJMCMC done.
## Data (y) coerced to matrix type.
## 
## MJMCMC begin.
## New best crit in cur pop: -1117.31391926091 
## New best crit in cur pop: -1114.79856223435 
## New best crit in cur pop: 1077.33039283092 
## New best crit in cur pop: 1079.38261342416 
## New best crit in cur pop: 1082.30025668189 
## New best crit in cur pop: 1085.5297818783 
## New best crit in cur pop: 1085.87094049572 
## New best crit in cur pop: 1087.99874200791 
## New best crit in cur pop: 1091.0850487173 
##  |=                                       | |==                                      | |===                                     | |====                                    | |=====                                   | |======                                  | |=======                                 | |========                                | |=========                               | |==========                              | |===========                             | |============                            | |=============                           | |==============                          | |===============                         | |================                        | |=================                       | |==================                      | |===================                     | |====================                    | |=====================                   | |======================                  | |=======================                 | |========================                | |=========================               | |==========================              | |===========================             | |============================            | |=============================           | |==============================          | |===============================         | |================================        | |=================================       | |==================================      | |===================================     | |====================================    | |=====================================   | |======================================  | |======================================= | |========================================|
## MJMCMC done.
## Data (y) coerced to matrix type.
## 
## MJMCMC begin.
## New best crit in cur pop: -1111.67573314726 
## New best crit in cur pop: -1111.11259868173 
## New best crit in cur pop: -1110.56369634291 
## New best crit in cur pop: -1089.58881316014 
## New best crit in cur pop: 1077.33039283092 
## New best crit in cur pop: 1079.38261342416 
## New best crit in cur pop: 1079.77145578101 
## New best crit in cur pop: 1082.64099003929 
##  |=                                       |New best crit in cur pop: 1085.5297818783 
## New best crit in cur pop: 1085.67356841452 
## New best crit in cur pop: 1087.99874200791 
## New best crit in cur pop: 1091.0850487173 
##  |==                                      | |===                                     | |====                                    | |=====                                   | |======                                  | |=======                                 | |========                                | |=========                               | |==========                              | |===========                             | |============                            | |=============                           | |==============                          | |===============                         | |================                        | |=================                       | |==================                      | |===================                     | |====================                    | |=====================                   | |======================                  | |=======================                 | |========================                | |=========================               | |==========================              | |===========================             | |============================            | |=============================           | |==============================          | |===============================         | |================================        | |=================================       | |==================================      | |===================================     | |====================================    | |=====================================   | |======================================  | |======================================= | |========================================|
## MJMCMC done.
## Data (y) coerced to matrix type.
## 
## MJMCMC begin.
## New best crit in cur pop: -1124.04963922226 
## New best crit in cur pop: 1085.7542999407 
## New best crit in cur pop: 1088.88793834687 
## New best crit in cur pop: 1091.0850487173 
##  |=                                       | |==                                      | |===                                     | |====                                    | |=====                                   | |======                                  | |=======                                 | |========                                | |=========                               | |==========                              | |===========                             | |============                            | |=============                           | |==============                          | |===============                         | |================                        | |=================                       | |==================                      | |===================                     | |====================                    | |=====================                   | |======================                  | |=======================                 | |========================                | |=========================               | |==========================              | |===========================             | |============================            | |=============================           | |==============================          | |===============================         | |================================        | |=================================       | |==================================      | |===================================     | |====================================    | |=====================================   | |======================================  | |======================================= | |========================================|
## MJMCMC done.
## Data (y) coerced to matrix type.
## 
## MJMCMC begin.
## New best crit in cur pop: -1101.99917200549 
## New best crit in cur pop: 473.839370663609 
## New best crit in cur pop: 476.937748758556 
## New best crit in cur pop: 478.680750395656 
## New best crit in cur pop: 1082.90150241412 
## New best crit in cur pop: 1085.14166282579 
## New best crit in cur pop: 1085.46269235986 
## New best crit in cur pop: 1088.3580136576 
## New best crit in cur pop: 1091.0850487173 
##  |=                                       | |==                                      | |===                                     | |====                                    | |=====                                   | |======                                  | |=======                                 | |========                                | |=========                               | |==========                              | |===========                             | |============                            | |=============                           | |==============                          | |===============                         | |================                        | |=================                       | |==================                      | |===================                     | |====================                    | |=====================                   | |======================                  | |=======================                 | |========================                | |=========================               | |==========================              | |===========================             | |============================            | |=============================           | |==============================          | |===============================         | |================================        | |=================================       | |==================================      | |===================================     | |====================================    | |=====================================   | |======================================  | |======================================= | |========================================|
## MJMCMC done.
## Data (y) coerced to matrix type.
## 
## MJMCMC begin.
## New best crit in cur pop: -1111.22564781037 
## New best crit in cur pop: -1090.3747486322 
## New best crit in cur pop: 1080.76990373765 
## New best crit in cur pop: 1082.9978883046 
## New best crit in cur pop: 1083.24517586196 
## New best crit in cur pop: 1086.46574674056 
## New best crit in cur pop: 1091.0850487173 
##  |=                                       | |==                                      | |===                                     | |====                                    | |=====                                   | |======                                  | |=======                                 | |========                                | |=========                               | |==========                              | |===========                             | |============                            | |=============                           | |==============                          | |===============                         | |================                        | |=================                       | |==================                      | |===================                     | |====================                    | |=====================                   | |======================                  | |=======================                 | |========================                | |=========================               | |==========================              | |===========================             | |============================            | |=============================           | |==============================          | |===============================         | |================================        | |=================================       | |==================================      | |===================================     | |====================================    | |=====================================   | |======================================  | |======================================= | |========================================|
## MJMCMC done.
## Data (y) coerced to matrix type.
## 
## MJMCMC begin.
## New best crit in cur pop: -1133.87106277544 
## New best crit in cur pop: -1127.62578571715 
## New best crit in cur pop: 1083.37502125865 
## New best crit in cur pop: 1086.20863598236 
## New best crit in cur pop: 1086.22602298498 
## New best crit in cur pop: 1088.75331680507 
## New best crit in cur pop: 1091.0850487173 
##  |=                                       | |==                                      | |===                                     | |====                                    | |=====                                   | |======                                  | |=======                                 | |========                                | |=========                               | |==========                              | |===========                             | |============                            | |=============                           | |==============                          | |===============                         | |================                        | |=================                       | |==================                      | |===================                     | |====================                    | |=====================                   | |======================                  | |=======================                 | |========================                | |=========================               | |==========================              | |===========================             | |============================            | |=============================           | |==============================          | |===============================         | |================================        | |=================================       | |==================================      | |===================================     | |====================================    | |=====================================   | |======================================  | |======================================= | |========================================|
## MJMCMC done.
chain_means <- rowMeans(all.probs)
chain_sd <- sapply(1:9, function(i) sd(all.probs[i,]))

alpha_upper <- chain_means + 1.96*chain_sd
alpha_lower <- chain_means - 1.96*chain_sd

stability <- data.frame(covariate = names(data)[-1],one.chain = round(blr.log$marg.probs[1,],4),mean = round(chain_means,4),lower = round(alpha_lower,4),upper = round(alpha_upper,4))

print(stability)
##              covariate one.chain   mean  lower  upper
## 1                 mass    0.0461 0.0462 0.0458 0.0465
## 2               radius    0.0377 0.0371 0.0332 0.0410
## 3               period    1.0000 1.0000 1.0000 1.0000
## 4         eccentricity    0.0601 0.0586 0.0558 0.0613
## 5        hoststar_mass    1.0000 1.0000 1.0000 1.0000
## 6      hoststar_radius    0.1531 0.1520 0.1476 0.1563
## 7 hoststar_metallicity    0.9932 0.9927 0.9913 0.9941
## 8 hoststar_temperature    0.1719 0.1711 0.1650 0.1773
## 9           binaryflag    0.0364 0.0361 0.0331 0.0391

The results appear stable

1. Inclusion Probabilities:

  • High Inclusion Probabilities: log of Period has perfect inclusion probabilities, indicating they are crucial predictors for the log-transformed semimajor axis.

  • Strong Predictors: log of hoststar_mass is also above 0.5 and thus corresponds to the median probability model

2. Effect Sizes:

  • Period: around 0.66 corresponding to the power of 2/3 on an orignial scale

  • Hoststar Mass: around 0.33 corresponding to the power of 1/3 on an original scale

    let us also fit the median probability model here

    mpm <- lm(semimajoraxis ~ 1 + period + hoststar_mass, data = log.data.train)
    summary(mpm)
    ## 
    ## Call:
    ## lm(formula = semimajoraxis ~ 1 + period + hoststar_mass, data = log.data.train)
    ## 
    ## Residuals:
    ##      Min       1Q   Median       3Q      Max 
    ## -0.40033 -0.00400 -0.00245 -0.00120  0.48549 
    ## 
    ## Coefficients:
    ##                 Estimate Std. Error t value Pr(>|t|)    
    ## (Intercept)   -3.9309479  0.0026783 -1467.7   <2e-16 ***
    ## period         0.6666242  0.0008239   809.1   <2e-16 ***
    ## hoststar_mass  0.3310820  0.0047097    70.3   <2e-16 ***
    ## ---
    ## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
    ## 
    ## Residual standard error: 0.04356 on 636 degrees of freedom
    ## Multiple R-squared:  0.9991, Adjusted R-squared:  0.9991 
    ## F-statistic: 3.379e+05 on 2 and 636 DF,  p-value: < 2.2e-16
    confint(mpm)
    ##                    2.5 %     97.5 %
    ## (Intercept)   -3.9362072 -3.9256886
    ## period         0.6650062  0.6682421
    ## hoststar_mass  0.3218336  0.3403305

corroborating the model averaged conclusions.

  • Better Fit to Physical Laws: The transformed model better aligns with astrophysical principles, making the results more interpretable and reliable. On the original scale we get very close to the functional form of the third Kepller’s law stating that \[a \propto (P^2*M)^{1/3}\]with \[a\] being the semi major axis, \[P\] being the period of rotation and \[M\] being the solar mass.

Predictions

preds.train.log <- predict(blr.log, as.matrix(log.data.train[,-1]),link = exp)
preds.test.log <- predict(blr.log, as.matrix(log.data.test[,-1]),link = exp)
r.blr.log <- round(c(cor(data.train[,1],preds.train.log$mean)^2,cor(data.test[,1],preds.test.log$mean)^2),3)
print(r.blr.log)
## [1] 1 1
plot(x = data.train[, 1], preds.train.log$mean,ylim = c(min(preds.train$mean),max(preds.train$mean)), xlab = "data", ylab = "prediction", main = "Title of the Plot", col = "blue", pch = 16)
points(x = data.test[, 1], preds.test.log$mean, col = "red", pch = 17)
legend("topright", legend = c(paste0("Training Data, R.sqr = ",r.blr.log[1]), paste0("Test Data, R.sqr = ",r.blr.log[2])), col = c("blue", "red"), pch = c(16, 17))

The model as expected gave perfect predictions on the original scale as we essentially recovered the law.

Here, we were somewhat lucky since the law was easy to discover on the log-transformed scale.

Let us still finish the steps that we were planning to do prior to transforming the covariates and the responses. We proceed on the original scale.

Iteration 3: More complex functional forms with Bayesian methods

Bayesian fractional polynomials

transforms <- c("p0","p2","p3","p05","pm05","pm1","pm2","p0p0","p0p05","p0p1","p0p2","p0p3","p0p05","p0pm05","p0pm1","p0pm2")
probs <- gen.probs.gmjmcmc(transforms)
probs$gen <- c(0,1,0,1) # Only modifications!
params <- gen.params.gmjmcmc(ncol(data.train)-1)
params$feat$D <- 1   # Set depth of features to 1
set.seed(1)
bfp <- FBMS::fbms(semimajoraxis ~ ., data = data.train,beta_prior = list(type = "Jeffreys-BIC"),method = "gmjmcmc.parallel",transforms = transforms,runs = 20,cores = 10,P = 20, probs = probs, params = params)
plot(bfp)

summary(bfp,labels = names(data.train)[-1])

Check convergence

diagn_plot(bfp,window = 10,FUN = median)

## $stat
##  [1] -882.498479 -885.104130 -884.846385 -882.381686 -882.381686 -882.381686
##  [7] -840.175326 -307.693898 -213.588380  -79.773830  -13.458606 -261.414603
## [13]   -5.226501  -53.179135    2.650900  -24.225416   34.810817   38.572304
## [19]   23.076000  -29.214235
## 
## $lower
##  [1] -882.4985 -888.7153 -887.6604 -885.2594 -885.1315 -884.9836 -872.1848
##  [8] -703.3831 -749.0891 -729.7020 -743.2363 -995.7025 -772.1189 -807.7916
## [15] -715.9015 -659.6048 -462.4664 -201.8907 -172.4377 -193.3692
## 
## $upper
##  [1] -882.49848 -881.49295 -882.03234 -879.50400 -879.63184 -879.77981
##  [7] -808.16589   87.99533  321.91232  570.15431  716.31907  472.87327
## [13]  761.66588  701.43333  721.20328  611.15396  532.08806  279.03535
## [19]  218.58967  134.94072
diagn_plot(bfp,window = 10,FUN = max)

## $stat
##  [1] -882.38169 -882.38169 -882.38169 -882.38169 -234.99816 -291.49478
##  [7]   60.52627   79.07831   76.69331   73.89629   77.84184   74.31885
## [13]   84.64836   87.03875   76.32569   77.09571   82.77481   76.74889
## [19]   77.74232   71.42645
## 
## $lower
##  [1] -882.38169 -882.38169 -882.38169 -882.38169 -802.44441 -919.11215
##  [7] -730.64548 -780.76034 -808.61050 -816.56372 -808.57924 -764.27419
## [13] -662.82365 -503.86397 -194.66062 -140.91584   69.11876   68.41815
## [19]   69.39324   62.08285
## 
## $upper
##  [1] -882.38169 -882.38169 -882.38169 -882.38169  332.44809  336.12259
##  [7]  851.69801  938.91696  961.99712  964.35631  964.26292  912.91188
## [13]  832.12038  677.94147  347.31201  295.10727   96.43086   85.07963
## [19]   86.09140   80.77006

We see convergence after 17th generation of GMJMCMCwe see the same important effects as in a shorter run. Let us check convergence

Convergence seems rather stable for this class of models on this data set and with these tuning parameters of the sampler.

1. Importance of Period:

  • The predictor period has an extremely high inclusion probability, indicating it is almost certainly a key predictor for the semimajor axis.

  • Fractional polynomial transformations of period, specifically p0p05(period) andp3(period) also show notable inclusion probability. This suggests that non-linear transformations of periodare important for capturing the relationship with the semimajor axis.

2. Other Predictors:

Predictors such as mass, radius, hoststar_mass, and some transformations of them seem important and while all of them make sense in terms of the true law, the true law was not feasible in the space of fractional polynomials.

hoststar_metallicity, hoststar_temperature, hoststar_radius, and eccentricity have very low inclusion probabilities, all less than 0.001.

But let us look at MPM here as the model, just like for the model averaged effect, we see positive effect of period and its polynomial term, which makes sense.

confint(lm(semimajoraxis ~ 1 + period + p0p05(period) + p3(period) + hoststar_mass + p0p3(hoststar_mass) + radius + p0pm1(mass), data = data.train))
##                             2.5 %        97.5 %
## (Intercept)         -1.309135e-01 -1.175952e-03
## period               1.953770e-04  2.196413e-04
## p0p05(period)        7.287096e-03  7.702851e-03
## p3(period)          -1.002925e-15 -4.866752e-16
## hoststar_mass        5.034157e-02  2.006184e-01
## p0p3(hoststar_mass)  2.225419e-02  4.862316e-02
## radius              -7.607112e-02 -4.939350e-03
## p0pm1(mass)          2.675449e-06  3.317275e-06

Predictions

preds.train.bfp <- predict(bfp, data.train[,-1])
preds.test.bfp <- predict(bfp, data.test[,-1])
r.bfp <- round(c(cor(data.train[,1],preds.train.bfp$aggr$mean)^2,cor(data.test[,1],preds.test.bfp$aggr$mean)^2),3)
plot(x = data.train[, 1], preds.train.bfp$aggr$mean, xlab = "data", ylab = "prediction", main = "Title of the Plot", col = "blue", pch = 16)
points(x = data.test[, 1], preds.test.bfp$aggr$mean, col = "red", pch = 17)
legend("topright", legend = c(paste0("Training Data, R.sqr = ",r.bfp[1]), paste0("Test Data, R.sqr = ",r.bfp[2])), col = c("blue", "red"), pch = c(16, 17))

The predictions are excellent and even slightly better than for the linear model, yet worse than for the log transformed data used in the linear model.

The limitation of a BFP model is the lack of interactions. So let us try out Bayesian generalized nonlinear models that allow to both model non-linearity and interactions.

Bayesian generalized nonlinear models

transforms <- c("sin_deg","exp_dbl","p0","troot","p3")
probs <- gen.probs.gmjmcmc(transforms)
params <- gen.params.gmjmcmc(ncol(data.train)-1)
params$loglik$var = "unknown"
set.seed(1)
bgnlm <- FBMS::fbms(semimajoraxis ~ ., data = data.train,method = "gmjmcmc.parallel",beta_prior = list(type = "Jeffreys-BIC"),transforms = transforms,runs = 20,cores = 8,P = 20, probs = probs, params = params)
plot(bgnlm)

summary(bgnlm,labels = names(data.train)[-1])

check convergence

diagn_plot(bgnlm,window = 10,FUN = median)

## $stat
##  [1] -882.49848 -882.49848 -882.38169 -882.38169 -882.38169 -882.38169
##  [7] -882.38169 -266.55466 -216.29026  -28.14767   73.49174  140.89935
## [13]  181.63330  170.21288  119.62195  322.37726  373.74837  277.13581
## [19]  170.21084  303.89412
## 
## $lower
##  [1] -882.49848 -882.49848 -882.51385 -882.51385 -882.50707 -882.49989
##  [7] -882.49338 -693.31631 -770.82359 -712.24141 -709.83556 -729.88691
## [13] -739.11013 -753.86412 -756.96472 -494.06437 -317.08038 -124.88826
## [19] -151.92605   70.98745
## 
## $upper
##  [1] -882.4985 -882.4985 -882.2495 -882.2495 -882.2563 -882.2635 -882.2700
##  [8]  160.2070  338.2431  655.9461  856.8190 1011.6856 1102.3767 1094.2899
## [15]  996.2086 1138.8189 1064.5771  679.1599  492.3477  536.8008
diagn_plot(bgnlm,window = 10,FUN = max)

## $stat
##  [1] -882.38169 -882.38169 -246.41327  -97.54122  650.83817 -107.36788
##  [7]  432.36952  438.26522  467.77824  752.92557  719.95926 1217.40159
## [13] 1187.52639 1206.99073 1253.71689 1304.36290 1251.45641 1268.97710
## [19] 1109.74589 1186.79588
## 
## $lower
##  [1]  -882.38169  -882.38169  -966.06606  -910.20250  -598.75852 -1234.71815
##  [7]  -717.77635  -704.88182  -661.25842  -415.69863  -456.74935    69.29097
## [13]   224.48276   290.35200   400.30488   393.85450   530.65911   602.67583
## [19]   554.43565   789.67991
## 
## $upper
##  [1] -882.3817 -882.3817  473.2395  715.1201 1900.4349 1019.9824 1582.5154
##  [8] 1581.4123 1596.8149 1921.5498 1896.6679 2365.5122 2150.5700 2123.6295
## [15] 2107.1289 2214.8713 1972.2537 1935.2784 1665.0561 1583.9118

we see possible convergence for the later generations of GMJMCMC, but let us increase the compute to check if it is indeed so. The limitation of the convergence statistics is that it may show perfect convergence upon algorithm stuck in a good mode and not mixing futher across the modes.

set.seed(1)
bgnlm <- FBMS::fbms(semimajoraxis ~ ., data = data.train,method = "gmjmcmc.parallel",beta_prior = list(type = "Jeffreys-BIC"),transforms = transforms,runs = 64,cores = 8,P = 40,probs = probs, params = params)
plot(bgnlm)

summary(bgnlm,labels = names(data.train)[-1])
diagn_plot(bgnlm,window = 10,FUN = median)

## $stat
##  [1] -882.49848 -882.49848 -882.38169 -882.38169 -882.38169 -882.38169
##  [7] -863.81427 -580.85732 -315.40354 -247.18504  -34.10497  -36.75010
## [13]   36.76524  118.04771  133.10835  253.21845  313.48618  367.67746
## [19]  303.53920  306.22489  315.15595  332.65757  377.64811  362.00800
## [25]  382.74654  558.94770  566.08523  569.76331  511.06889  550.87218
## [31]  565.98585  574.52006  595.31072  517.75564  655.30887  733.82367
## [37]  648.34714  648.77358  678.62936  626.94302
## 
## $lower
##  [1] -882.49848 -882.49848 -882.51385 -882.51385 -882.50707 -882.49989
##  [7] -877.59821 -788.37704 -710.65314 -746.68424 -661.60371 -746.29806
## [13] -732.64780 -689.76760 -672.14346 -528.37933 -394.81173 -197.34573
## [19] -140.56306  -71.65676   20.25169   65.92739  156.96180  189.03910
## [25]  242.92113  404.38016  380.87711  359.92648  293.75374  333.96478
## [31]  356.13135  379.40035  418.94685  363.92393  523.45303  609.90259
## [37]  519.58322  516.65496  539.77074  502.83455
## 
## $upper
##  [1] -882.49848 -882.49848 -882.24952 -882.24952 -882.25631 -882.26348
##  [7] -850.03033 -373.33760   79.84605  252.31416  593.39376  672.79787
## [13]  806.17828  925.86302  938.36016 1034.81623 1021.78409  932.70064
## [19]  747.64146  684.10653  610.06020  599.38775  598.33442  534.97689
## [25]  522.57194  713.51525  751.29334  779.60014  728.38404  767.77957
## [31]  775.84035  769.63977  771.67460  671.58735  787.16472  857.74474
## [37]  777.11105  780.89220  817.48798  751.05149
diagn_plot(bgnlm,window = 10,FUN = max)

## $stat
##  [1] -882.38169 -882.38169 -882.38169   49.25869   87.34507  351.91830
##  [7]  380.70720  612.72366  691.23858  785.32710  815.56827 1290.96335
## [13] 1237.11614 1369.06289 1409.75331 1400.75334 1400.42406 1587.82787
## [19] 1587.82787 1536.06076 1587.82787 1587.82787 1587.82787 1584.83012
## [25] 1587.82787 1578.22416 1587.82787 1584.82329 1587.82787 1555.13494
## [31] 1587.82787 1556.28954 1587.82787 1547.25069 1587.82787 1587.82787
## [37] 1587.82787 1587.82787 1587.82787 1587.82787
## 
## $lower
##  [1] -882.38169 -882.38169 -882.38169 -863.73210 -933.57060 -788.60073
##  [7] -796.10473 -631.36361 -594.54006 -533.49497 -518.69214  -45.20614
## [13]   41.82059  463.32388  521.32829  593.69662  651.32162  907.76237
## [19]  959.03066  995.47285 1148.45399 1334.64285 1347.69440 1398.54378
## [25] 1422.69466 1432.35457 1476.71577 1554.60277 1557.60735 1521.47936
## [31] 1568.55433 1531.54206 1563.08038 1516.24794 1556.48116 1556.48116
## [37] 1555.84780 1555.84780 1555.55456 1555.55456
## 
## $upper
##  [1] -882.3817 -882.3817 -882.3817  962.2495 1108.2607 1492.4373 1557.5191
##  [8] 1856.8109 1977.0172 2104.1492 2149.8287 2627.1328 2432.4117 2274.8019
## [15] 2298.1783 2207.8101 2149.5265 2267.8934 2216.6251 2076.6487 2027.2017
## [22] 1841.0129 1827.9613 1771.1165 1752.9611 1724.0937 1698.9400 1615.0438
## [29] 1618.0484 1588.7905 1607.1014 1581.0370 1612.5754 1578.2534 1619.1746
## [36] 1619.1746 1619.8079 1619.8079 1620.1012 1620.1012

Median probability model

confint(lm(semimajoraxis ~ 1 +I(troot(period*period*hoststar_mass)),data = data.train))
##                                                   2.5 %      97.5 %
## (Intercept)                               -0.0007198221 0.002284746
## I(troot(period * period * hoststar_mass))  0.0195473844 0.019560654
preds.train.bgnlm <- predict(bgnlm, data.train[,-1])
preds.test.bgnlm <- predict(bgnlm, data.test[,-1])
r.bgnlm <- round(c(cor(data.train[,1],preds.train.bgnlm$aggr$mean)^2,cor(data.test[,1],preds.test.bgnlm$aggr$mean)^2),3)
plot(x = data.train[, 1], preds.train.bgnlm$aggr$mean, xlab = "data", ylab = "prediction", main = "Title of the Plot", col = "blue", pch = 16)
points(x = data.test[, 1], preds.test.bgnlm$aggr$mean, col = "red", pch = 17)
legend("topright", legend = c(paste0("Training Data, R.sqr = ",r.bgnlm[1]), paste0("Test Data, R.sqr = ",r.bgnlm[2])), col = c("blue", "red"), pch = c(16, 17))


In the Bayesian Generalized Nonlinear Model (BGNLM) analysis, you obtained the following results:

Predictor: troot(((period*hoststar_mass)*period)) Inclusion Probability: 1.000000

This result indicates a perfect inclusion probability (1.0) for the predictor troot(((period*hoststar_mass)*period)), suggesting that this complex non-linear interaction term is crucial for predicting the semimajor axis.

Interpretation

1. Complex Interaction Term:

  • The predictor troot(((period*hoststar_mass)*period)) combines period and hoststar_mass in a multiplicative form, followed by a transformation.

  • troot likely represents a specific non-linear transformation, such as a root function, which means the predictor is a transformed version of the product of period, hoststar_mass, and period again.

Discussion with Relation to Kepler’s Third Law

1. Kepler’s Third Law

Kepler’s Third Law states that the square of the orbital period of a planet is proportional to the cube of the semimajor axis of its orbit.

2. Implications and Interpretation

  • Alignment with Physical Laws: The inclusion of troot(((period*hoststar_mass)*period)) in the BGNLM model supports Kepler’s Third Law by explicitly incorporating the cubic root transformation of the complicated interaction. This suggests that the model correctly captures the underlying (known in this case as Kepler’s Third Law) physical relationship between the orbital period and the semimajor axis.

  • Predictive Power and Physical Meaning: The perfect inclusion probability of this term indicates that the model effectively leverages the cubic root relationship to predict the semimajor axis. This reinforces the physical validity of the model and emphasizes the importance of incorporating both the period and the host star’s mass in a non-linear fashion.

2. Comparison with Previous Models:

  • Linear and Log-Transformed Models: These simpler models highlighted the importance of period and hoststar_mass individually, but the BGNLM model shows that their interaction, particularly in a non-linear form, is even more crucial.

  • Fractional Polynomials: The fractional polynomial model also indicated non-linear relationships but did not capture this specific interaction and it also missed the solar mass in its functional form, highlighting the added value of BGNLM in uncovering complex dependencies.

Predictions

preds.train.bgnlm <- predict(bgnlm, data.train[,-1])
preds.test.bgnlm <- predict(bgnlm, data.test[,-1])
r.bgnlm <- round(c(cor(data.train[,1],preds.train.bgnlm$aggr$mean)^2,cor(data.test[,1],preds.test.bgnlm$aggr$mean)^2),3)
plot(x = data.train[, 1], preds.train.bgnlm$aggr$mean, xlab = "data", ylab = "prediction", main = "Title of the Plot", col = "blue", pch = 16)
points(x = data.test[, 1], preds.test.bgnlm$aggr$mean, col = "red", pch = 17)
legend("topright", legend = c(paste0("Training Data, R.sqr = ",r.bgnlm[1]), paste0("Test Data, R.sqr = ",r.bgnlm[2])), col = c("blue", "red"), pch = c(16, 17))

The predictions are expectidely perfect on both training and testing sets, as inclusion of troot(((period*hoststar_mass)*period)) in the BGNLM model effectively captures the essence of Kepler’s Third Law, incorporating the orbital period and the host star’s mass in a cubic root transformation. This predictor reflects the key physical relationship between these variables, demonstrating that the model is not only statistically sound but also physically meaningful. The high inclusion probability underscores the importance of this non-linear interaction in accurately predicting the semimajor axis.

And use the g prior and redo the inference.

set.seed(1)
bgnlm <- FBMS::fbms(semimajoraxis ~ ., data = data.train,method = "gmjmcmc.parallel",beta_prior = list(type = "g-prior", g = nrow(data.train)),transforms = transforms,runs = 64,cores = 8,P = 40,probs = probs, params = params)
plot(bgnlm)

summary(bgnlm,labels = names(data.train)[-1])

Here we perfectly recover the true law! Also good convergence for g - prior.

diagn_plot(bgnlm,window = 10,FUN = median)

## $stat
##  [1]  898.2329  897.1210  898.4530  898.6731  898.6731  898.6731  909.1390
##  [8] 1129.0970 1238.0356 1276.4673 1270.5973 1270.5481 1282.2995 1289.1977
## [15] 1300.6593 1298.6057 1289.9745 1286.2103 1289.6226 1308.4389 1314.4066
## [22] 1309.8131 1310.6536 1312.7699 1310.8936 1312.8077 1306.9176 1314.7034
## [29] 1314.6228 1307.6979 1318.7803 1319.1866 1320.9995 1315.5996 1316.8580
## [36] 1322.5425 1311.4218 1317.0231 1319.7607 1314.6869
## 
## $lower
##  [1]  898.2329  895.5801  897.0536  897.3211  897.4059  897.4856  901.0399
##  [8]  970.0645  986.9912  970.2130  937.3096  917.2263  921.1179  933.1128
## [15]  963.4488  999.9810 1059.6222 1190.7452 1255.4991 1284.6599 1287.0899
## [22] 1283.9335 1288.4575 1291.6667 1290.5471 1291.7250 1286.0839 1295.5609
## [29] 1300.6921 1302.1599 1312.0201 1311.3235 1311.9782 1306.7350 1307.8984
## [36] 1312.7579 1301.4571 1308.7073 1311.2853 1306.2177
## 
## $upper
##  [1]  898.2329  898.6620  899.8525  900.0251  899.9404  899.8606  917.2380
##  [8] 1288.1295 1489.0800 1582.7216 1603.8850 1623.8700 1643.4811 1645.2826
## [15] 1637.8698 1597.2303 1520.3267 1381.6755 1323.7462 1332.2180 1341.7233
## [22] 1335.6927 1332.8497 1333.8731 1331.2401 1333.8904 1327.7514 1333.8459
## [29] 1328.5535 1313.2359 1325.5405 1327.0496 1330.0208 1324.4641 1325.8175
## [36] 1332.3271 1321.3866 1325.3389 1328.2361 1323.1562
diagn_plot(bgnlm,window = 10,FUN = max)

## $stat
##  [1]  898.6731  898.6731  921.6583 1085.1515 1240.8263 1270.5937 1330.8985
##  [8] 1331.2407 1332.1172 1367.6588 1367.6588 1357.3971 1367.6588 1367.6588
## [15] 1383.6693 1383.6693 1367.6588 1367.6588 1367.6588 1367.6588 1367.6588
## [22] 1367.6588 1370.7495 1370.7495 1367.6588 1370.7495 1367.6588 1370.7495
## [29] 1370.7495 1370.7495 1370.7495 1367.6588 1370.7495 1370.7495 1370.7495
## [36] 1361.6201 1368.5465 1370.7495 1370.7495 1370.7495
## 
## $lower
##  [1]  898.6731  898.6731  895.6486  908.6323  944.3674  932.7711  959.9434
##  [8]  949.9901  950.0324  982.0154  983.2045 1010.9078 1087.7685 1199.2737
## [15] 1295.8036 1318.9557 1329.0952 1333.8339 1340.9003 1352.7975 1352.7975
## [22] 1352.7975 1358.1973 1358.3675 1355.2767 1361.4137 1364.8292 1367.6932
## [29] 1367.5860 1367.5860 1367.6932 1364.6025 1367.9200 1367.9200 1367.9200
## [36] 1356.0371 1363.0230 1365.2462 1365.2462 1365.2462
## 
## $upper
##  [1]  898.6731  898.6731  947.6680 1261.6708 1537.2852 1608.4164 1701.8536
##  [8] 1712.4914 1714.2020 1753.3022 1752.1130 1703.8865 1647.5490 1536.0438
## [15] 1471.5351 1448.3830 1406.2223 1401.4837 1394.4172 1382.5201 1382.5201
## [22] 1382.5201 1383.3018 1383.1316 1380.0408 1380.0853 1370.4884 1373.8058
## [29] 1373.9131 1373.9131 1373.8058 1370.7151 1373.5791 1373.5791 1373.5791
## [36] 1367.2031 1374.0700 1376.2528 1376.2528 1376.2528

And as expected perfect predictions

preds.train.bgnlm <- predict(bgnlm, data.train[,-1])
preds.test.bgnlm <- predict(bgnlm, data.test[,-1])
r.bgnlm <- round(c(cor(data.train[,1],preds.train.bgnlm$aggr$mean)^2,cor(data.test[,1],preds.test.bgnlm$aggr$mean)^2),3)
plot(x = data.train[, 1], preds.train.bgnlm$aggr$mean, xlab = "data", ylab = "prediction", main = "Title of the Plot", col = "blue", pch = 16)
points(x = data.test[, 1], preds.test.bgnlm$aggr$mean, col = "red", pch = 17)
legend("topright", legend = c(paste0("Training Data, R.sqr = ",r.bgnlm[1]), paste0("Test Data, R.sqr = ",r.bgnlm[2])), col = c("blue", "red"), pch = c(16, 17))

We see that variation is possible in the results and that ideally maximal possible compute resources should be used to reduce the variation. But given enough resources GMJMCMC converges to being able to recover the true underlying physical law.